sli1989
diff --git a/‎dev/_downloads/auto_examples_jupyter.zip
145 Bytes b/‎dev/_downloads/auto_examples_jupyter.zip
145 Bytes
diff --git a/‎dev/_downloads/auto_examples_python.zip
141 Bytes b/‎dev/_downloads/auto_examples_python.zip
141 Bytes
diff --git a/‎dev/_downloads/plot_stock_market.ipynb
Lines changed: 1 addition & 1 deletion b/‎dev/_downloads/plot_stock_market.ipynb
Lines changed: 1 addition & 1 deletion
diff --git a/‎dev/_downloads/plot_stock_market.py
Lines changed: 10 additions & 6 deletions b/‎dev/_downloads/plot_stock_market.py
Lines changed: 10 additions & 6 deletions
diff --git a/‎dev/_downloads/scikit-learn-docs.pdf
-6.27 KB b/‎dev/_downloads/scikit-learn-docs.pdf
-6.27 KB
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-68 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_001.png
-68 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-68 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0011.png
-68 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-1 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_002.png
-1 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-1 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_0021.png
-1 Bytes
diff --git a/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
95 Bytes b/‎dev/_images/sphx_glr_plot_agglomerative_clustering_003.png
95 Bytes
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "print(__doc__)\n\n# Author: Gael Varoquaux [email protected]\n# License: BSD 3 clause\n\nfrom datetime import datetime\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.collections import LineCollection\nfrom six.moves.urllib.request import urlopen\nfrom six.moves.urllib.parse import urlencode\nfrom sklearn import cluster, covariance, manifold\n\n\n# #############################################################################\n# Retrieve the data from Internet\n\ndef retry(f, n_attempts=3):\n    \"Wrapper function to retry function calls in case of exceptions\"\n    def wrapper(*args, **kwargs):\n        for i in range(n_attempts):\n            try:\n                return f(*args, **kwargs)\n            except Exception as e:\n                if i == n_attempts - 1:\n                    raise\n    return wrapper\n\n\ndef quotes_historical_google(symbol, date1, date2):\n    \"\"\"Get the historical data from Google finance.\n\n    Parameters\n    ----------\n    symbol : str\n        Ticker symbol to query for, for example ``\"DELL\"``.\n    date1 : datetime.datetime\n        Start date.\n    date2 : datetime.datetime\n        End date.\n\n    Returns\n    -------\n    X : array\n        The columns are ``date`` -- datetime, ``open``, ``high``,\n        ``low``, ``close`` and ``volume`` of type float.\n    \"\"\"\n    params = urlencode({\n        'q': symbol,\n        'startdate': date1.strftime('%b %d, %Y'),\n        'enddate': date2.strftime('%b %d, %Y'),\n        'output': 'csv'\n    })\n    url = 'http://www.google.com/finance/historical?' + params\n    response = urlopen(url)\n    dtype = {\n        'names': ['date', 'open', 'high', 'low', 'close', 'volume'],\n        'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']\n    }\n    converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}\n    return np.genfromtxt(response, delimiter=',', skip_header=1,\n                         dtype=dtype, converters=converters,\n                         missing_values='-', filling_values=-1)\n\n\n# Choose a time period reasonably calm (not too long ago so that we get\n# high-tech firms, and before the 2008 crash)\nd1 = datetime(2003, 1, 1)\nd2 = datetime(2008, 1, 1)\n\nsymbol_dict = {\n    'TOT': 'Total',\n    'XOM': 'Exxon',\n    'CVX': 'Chevron',\n    'COP': 'ConocoPhillips',\n    'VLO': 'Valero Energy',\n    'MSFT': 'Microsoft',\n    'IBM': 'IBM',\n    'TWX': 'Time Warner',\n    'CMCSA': 'Comcast',\n    'CVC': 'Cablevision',\n    'YHOO': 'Yahoo',\n    'DELL': 'Dell',\n    'HPQ': 'HP',\n    'AMZN': 'Amazon',\n    'TM': 'Toyota',\n    'CAJ': 'Canon',\n    'SNE': 'Sony',\n    'F': 'Ford',\n    'HMC': 'Honda',\n    'NAV': 'Navistar',\n    'NOC': 'Northrop Grumman',\n    'BA': 'Boeing',\n    'KO': 'Coca Cola',\n    'MMM': '3M',\n    'MCD': 'McDonald\\'s',\n    'PEP': 'Pepsi',\n    'K': 'Kellogg',\n    'UN': 'Unilever',\n    'MAR': 'Marriott',\n    'PG': 'Procter Gamble',\n    'CL': 'Colgate-Palmolive',\n    'GE': 'General Electrics',\n    'WFC': 'Wells Fargo',\n    'JPM': 'JPMorgan Chase',\n    'AIG': 'AIG',\n    'AXP': 'American express',\n    'BAC': 'Bank of America',\n    'GS': 'Goldman Sachs',\n    'AAPL': 'Apple',\n    'SAP': 'SAP',\n    'CSCO': 'Cisco',\n    'TXN': 'Texas Instruments',\n    'XRX': 'Xerox',\n    'WMT': 'Wal-Mart',\n    'HD': 'Home Depot',\n    'GSK': 'GlaxoSmithKline',\n    'PFE': 'Pfizer',\n    'SNY': 'Sanofi-Aventis',\n    'NVS': 'Novartis',\n    'KMB': 'Kimberly-Clark',\n    'R': 'Ryder',\n    'GD': 'General Dynamics',\n    'RTN': 'Raytheon',\n    'CVS': 'CVS',\n    'CAT': 'Caterpillar',\n    'DD': 'DuPont de Nemours'}\n\nsymbols, names = np.array(list(symbol_dict.items())).T\n\n# retry is used because quotes_historical_google can temporarily fail\n# for various reasons (e.g. empty result from Google API).\nquotes = [\n    retry(quotes_historical_google)(symbol, d1, d2) for symbol in symbols\n]\n\nclose_prices = np.vstack([q['close'] for q in quotes])\nopen_prices = np.vstack([q['open'] for q in quotes])\n\n# The daily variations of the quotes are what carry most information\nvariation = close_prices - open_prices\n\n\n# #############################################################################\n# Learn a graphical structure from the correlations\nedge_model = covariance.GraphLassoCV()\n\n# standardize the time series: using correlations rather than covariance\n# is more efficient for structure recovery\nX = variation.copy().T\nX /= X.std(axis=0)\nedge_model.fit(X)\n\n# #############################################################################\n# Cluster using affinity propagation\n\n_, labels = cluster.affinity_propagation(edge_model.covariance_)\nn_labels = labels.max()\n\nfor i in range(n_labels + 1):\n    print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))\n\n# #############################################################################\n# Find a low-dimension embedding for visualization: find the best position of\n# the nodes (the stocks) on a 2D plane\n\n# We use a dense eigen_solver to achieve reproducibility (arpack is\n# initiated with random vectors that we don't control). In addition, we\n# use a large number of neighbors to capture the large-scale structure.\nnode_position_model = manifold.LocallyLinearEmbedding(\n    n_components=2, eigen_solver='dense', n_neighbors=6)\n\nembedding = node_position_model.fit_transform(X.T).T\n\n# #############################################################################\n# Visualization\nplt.figure(1, facecolor='w', figsize=(10, 8))\nplt.clf()\nax = plt.axes([0., 0., 1., 1.])\nplt.axis('off')\n\n# Display a graph of the partial correlations\npartial_correlations = edge_model.precision_.copy()\nd = 1 / np.sqrt(np.diag(partial_correlations))\npartial_correlations *= d\npartial_correlations *= d[:, np.newaxis]\nnon_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)\n\n# Plot the nodes using the coordinates of our embedding\nplt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,\n            cmap=plt.cm.spectral)\n\n# Plot the edges\nstart_idx, end_idx = np.where(non_zero)\n# a sequence of (*line0*, *line1*, *line2*), where::\n#            linen = (x0, y0), (x1, y1), ... (xm, ym)\nsegments = [[embedding[:, start], embedding[:, stop]]\n            for start, stop in zip(start_idx, end_idx)]\nvalues = np.abs(partial_correlations[non_zero])\nlc = LineCollection(segments,\n                    zorder=0, cmap=plt.cm.hot_r,\n                    norm=plt.Normalize(0, .7 * values.max()))\nlc.set_array(values)\nlc.set_linewidths(15 * values)\nax.add_collection(lc)\n\n# Add a label to each node. The challenge here is that we want to\n# position the labels to avoid overlap with other labels\nfor index, (name, label, (x, y)) in enumerate(\n        zip(names, labels, embedding.T)):\n\n    dx = x - embedding[0]\n    dx[index] = 1\n    dy = y - embedding[1]\n    dy[index] = 1\n    this_dx = dx[np.argmin(np.abs(dy))]\n    this_dy = dy[np.argmin(np.abs(dx))]\n    if this_dx > 0:\n        horizontalalignment = 'left'\n        x = x + .002\n    else:\n        horizontalalignment = 'right'\n        x = x - .002\n    if this_dy > 0:\n        verticalalignment = 'bottom'\n        y = y + .002\n    else:\n        verticalalignment = 'top'\n        y = y - .002\n    plt.text(x, y, name, size=10,\n             horizontalalignment=horizontalalignment,\n             verticalalignment=verticalalignment,\n             bbox=dict(facecolor='w',\n                       edgecolor=plt.cm.spectral(label / float(n_labels)),\n                       alpha=.6))\n\nplt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),\n         embedding[0].max() + .10 * embedding[0].ptp(),)\nplt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),\n         embedding[1].max() + .03 * embedding[1].ptp())\n\nplt.show()"
+        "from __future__ import print_function\n\n# Author: Gael Varoquaux [email protected]\n# License: BSD 3 clause\n\nimport sys\nfrom datetime import datetime\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.collections import LineCollection\nfrom six.moves.urllib.request import urlopen\nfrom six.moves.urllib.parse import urlencode\nfrom sklearn import cluster, covariance, manifold\n\nprint(__doc__)\n\n# #############################################################################\n# Retrieve the data from Internet\n\ndef retry(f, n_attempts=3):\n    \"Wrapper function to retry function calls in case of exceptions\"\n    def wrapper(*args, **kwargs):\n        for i in range(n_attempts):\n            try:\n                return f(*args, **kwargs)\n            except Exception as e:\n                if i == n_attempts - 1:\n                    raise\n    return wrapper\n\n\ndef quotes_historical_google(symbol, date1, date2):\n    \"\"\"Get the historical data from Google finance.\n\n    Parameters\n    ----------\n    symbol : str\n        Ticker symbol to query for, for example ``\"DELL\"``.\n    date1 : datetime.datetime\n        Start date.\n    date2 : datetime.datetime\n        End date.\n\n    Returns\n    -------\n    X : array\n        The columns are ``date`` -- datetime, ``open``, ``high``,\n        ``low``, ``close`` and ``volume`` of type float.\n    \"\"\"\n    params = urlencode({\n        'q': symbol,\n        'startdate': date1.strftime('%b %d, %Y'),\n        'enddate': date2.strftime('%b %d, %Y'),\n        'output': 'csv'\n    })\n    url = 'http://www.google.com/finance/historical?' + params\n    response = urlopen(url)\n    dtype = {\n        'names': ['date', 'open', 'high', 'low', 'close', 'volume'],\n        'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']\n    }\n    converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}\n    return np.genfromtxt(response, delimiter=',', skip_header=1,\n                         dtype=dtype, converters=converters,\n                         missing_values='-', filling_values=-1)\n\n\n# Choose a time period reasonably calm (not too long ago so that we get\n# high-tech firms, and before the 2008 crash)\nd1 = datetime(2003, 1, 1)\nd2 = datetime(2008, 1, 1)\n\nsymbol_dict = {\n    'TOT': 'Total',\n    'XOM': 'Exxon',\n    'CVX': 'Chevron',\n    'COP': 'ConocoPhillips',\n    'VLO': 'Valero Energy',\n    'MSFT': 'Microsoft',\n    'IBM': 'IBM',\n    'TWX': 'Time Warner',\n    'CMCSA': 'Comcast',\n    'CVC': 'Cablevision',\n    'YHOO': 'Yahoo',\n    'DELL': 'Dell',\n    'HPQ': 'HP',\n    'AMZN': 'Amazon',\n    'TM': 'Toyota',\n    'CAJ': 'Canon',\n    'SNE': 'Sony',\n    'F': 'Ford',\n    'HMC': 'Honda',\n    'NAV': 'Navistar',\n    'NOC': 'Northrop Grumman',\n    'BA': 'Boeing',\n    'KO': 'Coca Cola',\n    'MMM': '3M',\n    'MCD': 'McDonald\\'s',\n    'PEP': 'Pepsi',\n    'K': 'Kellogg',\n    'UN': 'Unilever',\n    'MAR': 'Marriott',\n    'PG': 'Procter Gamble',\n    'CL': 'Colgate-Palmolive',\n    'GE': 'General Electrics',\n    'WFC': 'Wells Fargo',\n    'JPM': 'JPMorgan Chase',\n    'AIG': 'AIG',\n    'AXP': 'American express',\n    'BAC': 'Bank of America',\n    'GS': 'Goldman Sachs',\n    'AAPL': 'Apple',\n    'NYSE:SAP': 'SAP',\n    'CSCO': 'Cisco',\n    'TXN': 'Texas Instruments',\n    'XRX': 'Xerox',\n    'WMT': 'Wal-Mart',\n    'HD': 'Home Depot',\n    'GSK': 'GlaxoSmithKline',\n    'PFE': 'Pfizer',\n    'SNY': 'Sanofi-Aventis',\n    'NVS': 'Novartis',\n    'KMB': 'Kimberly-Clark',\n    'R': 'Ryder',\n    'GD': 'General Dynamics',\n    'RTN': 'Raytheon',\n    'CVS': 'CVS',\n    'CAT': 'Caterpillar',\n    'DD': 'DuPont de Nemours'}\n\nsymbols, names = np.array(sorted(symbol_dict.items())).T\n\n# retry is used because quotes_historical_google can temporarily fail\n# for various reasons (e.g. empty result from Google API).\nquotes = []\n\nfor symbol in symbols:\n    print('Fetching quote history for %r' % symbol, file=sys.stderr)\n    quotes.append(retry(quotes_historical_google)(symbol, d1, d2))\n\nclose_prices = np.vstack([q['close'] for q in quotes])\nopen_prices = np.vstack([q['open'] for q in quotes])\n\n# The daily variations of the quotes are what carry most information\nvariation = close_prices - open_prices\n\n\n# #############################################################################\n# Learn a graphical structure from the correlations\nedge_model = covariance.GraphLassoCV()\n\n# standardize the time series: using correlations rather than covariance\n# is more efficient for structure recovery\nX = variation.copy().T\nX /= X.std(axis=0)\nedge_model.fit(X)\n\n# #############################################################################\n# Cluster using affinity propagation\n\n_, labels = cluster.affinity_propagation(edge_model.covariance_)\nn_labels = labels.max()\n\nfor i in range(n_labels + 1):\n    print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))\n\n# #############################################################################\n# Find a low-dimension embedding for visualization: find the best position of\n# the nodes (the stocks) on a 2D plane\n\n# We use a dense eigen_solver to achieve reproducibility (arpack is\n# initiated with random vectors that we don't control). In addition, we\n# use a large number of neighbors to capture the large-scale structure.\nnode_position_model = manifold.LocallyLinearEmbedding(\n    n_components=2, eigen_solver='dense', n_neighbors=6)\n\nembedding = node_position_model.fit_transform(X.T).T\n\n# #############################################################################\n# Visualization\nplt.figure(1, facecolor='w', figsize=(10, 8))\nplt.clf()\nax = plt.axes([0., 0., 1., 1.])\nplt.axis('off')\n\n# Display a graph of the partial correlations\npartial_correlations = edge_model.precision_.copy()\nd = 1 / np.sqrt(np.diag(partial_correlations))\npartial_correlations *= d\npartial_correlations *= d[:, np.newaxis]\nnon_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)\n\n# Plot the nodes using the coordinates of our embedding\nplt.scatter(embedding[0], embedding[1], s=100 * d ** 2, c=labels,\n            cmap=plt.cm.spectral)\n\n# Plot the edges\nstart_idx, end_idx = np.where(non_zero)\n# a sequence of (*line0*, *line1*, *line2*), where::\n#            linen = (x0, y0), (x1, y1), ... (xm, ym)\nsegments = [[embedding[:, start], embedding[:, stop]]\n            for start, stop in zip(start_idx, end_idx)]\nvalues = np.abs(partial_correlations[non_zero])\nlc = LineCollection(segments,\n                    zorder=0, cmap=plt.cm.hot_r,\n                    norm=plt.Normalize(0, .7 * values.max()))\nlc.set_array(values)\nlc.set_linewidths(15 * values)\nax.add_collection(lc)\n\n# Add a label to each node. The challenge here is that we want to\n# position the labels to avoid overlap with other labels\nfor index, (name, label, (x, y)) in enumerate(\n        zip(names, labels, embedding.T)):\n\n    dx = x - embedding[0]\n    dx[index] = 1\n    dy = y - embedding[1]\n    dy[index] = 1\n    this_dx = dx[np.argmin(np.abs(dy))]\n    this_dy = dy[np.argmin(np.abs(dx))]\n    if this_dx > 0:\n        horizontalalignment = 'left'\n        x = x + .002\n    else:\n        horizontalalignment = 'right'\n        x = x - .002\n    if this_dy > 0:\n        verticalalignment = 'bottom'\n        y = y + .002\n    else:\n        verticalalignment = 'top'\n        y = y - .002\n    plt.text(x, y, name, size=10,\n             horizontalalignment=horizontalalignment,\n             verticalalignment=verticalalignment,\n             bbox=dict(facecolor='w',\n                       edgecolor=plt.cm.spectral(label / float(n_labels)),\n                       alpha=.6))\n\nplt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),\n         embedding[0].max() + .10 * embedding[0].ptp(),)\nplt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),\n         embedding[1].max() + .03 * embedding[1].ptp())\n\nplt.show()"
       ]
     }
   ],
 
@@ -59,11 +59,12 @@
 heuristic based on the direction of the nearest neighbor along each
 axis.
 """
-print(__doc__)
+from __future__ import print_function
 
 # Author: Gael Varoquaux [email protected]
 # License: BSD 3 clause
 
+import sys
 from datetime import datetime
 
 import numpy as np
@@ -73,6 +74,7 @@
 from six.moves.urllib.parse import urlencode
 from sklearn import cluster, covariance, manifold
 
+print(__doc__)
 
 # #############################################################################
 # Retrieve the data from Internet
@@ -170,7 +172,7 @@ def quotes_historical_google(symbol, date1, date2):
     'BAC': 'Bank of America',
     'GS': 'Goldman Sachs',
     'AAPL': 'Apple',
-    'SAP': 'SAP',
+    'NYSE:SAP': 'SAP',
     'CSCO': 'Cisco',
     'TXN': 'Texas Instruments',
     'XRX': 'Xerox',
@@ -188,13 +190,15 @@ def quotes_historical_google(symbol, date1, date2):
     'CAT': 'Caterpillar',
     'DD': 'DuPont de Nemours'}
 
-symbols, names = np.array(list(symbol_dict.items())).T
+symbols, names = np.array(sorted(symbol_dict.items())).T
 
 # retry is used because quotes_historical_google can temporarily fail
 # for various reasons (e.g. empty result from Google API).
-quotes = [
-    retry(quotes_historical_google)(symbol, d1, d2) for symbol in symbols
-]
+quotes = []
+
+for symbol in symbols:
+    print('Fetching quote history for %r' % symbol, file=sys.stderr)
+    quotes.append(retry(quotes_historical_google)(symbol, d1, d2))
 
 close_prices = np.vstack([q['close'] for q in quotes])
 open_prices = np.vstack([q['open'] for q in quotes])
Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,7 @@`
`26`	`26`	`},`
`27`	`27`	`"outputs": [],`
`28`	`28`	`"source": [`
`29`		- "print(__doc__)\n\n# Author: Gael Varoquaux [email protected]\n# License: BSD 3 clause\n\nfrom datetime import datetime\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.collections import LineCollection\nfrom six.moves.urllib.request import urlopen\nfrom six.moves.urllib.parse import urlencode\nfrom sklearn import cluster, covariance, manifold\n\n\n# #############################################################################\n# Retrieve the data from Internet\n\ndef retry(f, n_attempts=3):\n \"Wrapper function to retry function calls in case of exceptions\"\n def wrapper(args, kwargs):\n for i in range(n_attempts):\n try:\n return f(args, *kwargs)\n except Exception as e:\n if i == n_attempts - 1:\n raise\n return wrapper\n\n\ndef quotes_historical_google(symbol, date1, date2):\n \"\"\"Get the historical data from Google finance.\n\n Parameters\n ----------\n symbol : str\n Ticker symbol to query for, for example ``\"DELL\"``.\n date1 : datetime.datetime\n Start date.\n date2 : datetime.datetime\n End date.\n\n Returns\n -------\n X : array\n The columns are ``date`` -- datetime, ``open``, ``high``,\n ``low``, ``close`` and ``volume`` of type float.\n \"\"\"\n params = urlencode({\n 'q': symbol,\n 'startdate': date1.strftime('%b %d, %Y'),\n 'enddate': date2.strftime('%b %d, %Y'),\n 'output': 'csv'\n })\n url = 'http://www.google.com/finance/historical?' + params\n response = urlopen(url)\n dtype = {\n 'names': ['date', 'open', 'high', 'low', 'close', 'volume'],\n 'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']\n }\n converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}\n return np.genfromtxt(response, delimiter=',', skip_header=1,\n dtype=dtype, converters=converters,\n missing_values='-', filling_values=-1)\n\n\n# Choose a time period reasonably calm (not too long ago so that we get\n# high-tech firms, and before the 2008 crash)\nd1 = datetime(2003, 1, 1)\nd2 = datetime(2008, 1, 1)\n\nsymbol_dict = {\n 'TOT': 'Total',\n 'XOM': 'Exxon',\n 'CVX': 'Chevron',\n 'COP': 'ConocoPhillips',\n 'VLO': 'Valero Energy',\n 'MSFT': 'Microsoft',\n 'IBM': 'IBM',\n 'TWX': 'Time Warner',\n 'CMCSA': 'Comcast',\n 'CVC': 'Cablevision',\n 'YHOO': 'Yahoo',\n 'DELL': 'Dell',\n 'HPQ': 'HP',\n 'AMZN': 'Amazon',\n 'TM': 'Toyota',\n 'CAJ': 'Canon',\n 'SNE': 'Sony',\n 'F': 'Ford',\n 'HMC': 'Honda',\n 'NAV': 'Navistar',\n 'NOC': 'Northrop Grumman',\n 'BA': 'Boeing',\n 'KO': 'Coca Cola',\n 'MMM': '3M',\n 'MCD': 'McDonald\\'s',\n 'PEP': 'Pepsi',\n 'K': 'Kellogg',\n 'UN': 'Unilever',\n 'MAR': 'Marriott',\n 'PG': 'Procter Gamble',\n 'CL': 'Colgate-Palmolive',\n 'GE': 'General Electrics',\n 'WFC': 'Wells Fargo',\n 'JPM': 'JPMorgan Chase',\n 'AIG': 'AIG',\n 'AXP': 'American express',\n 'BAC': 'Bank of America',\n 'GS': 'Goldman Sachs',\n 'AAPL': 'Apple',\n 'SAP': 'SAP',\n 'CSCO': 'Cisco',\n 'TXN': 'Texas Instruments',\n 'XRX': 'Xerox',\n 'WMT': 'Wal-Mart',\n 'HD': 'Home Depot',\n 'GSK': 'GlaxoSmithKline',\n 'PFE': 'Pfizer',\n 'SNY': 'Sanofi-Aventis',\n 'NVS': 'Novartis',\n 'KMB': 'Kimberly-Clark',\n 'R': 'Ryder',\n 'GD': 'General Dynamics',\n 'RTN': 'Raytheon',\n 'CVS': 'CVS',\n 'CAT': 'Caterpillar',\n 'DD': 'DuPont de Nemours'}\n\nsymbols, names = np.array(list(symbol_dict.items())).T\n\n# retry is used because quotes_historical_google can temporarily fail\n# for various reasons (e.g. empty result from Google API).\nquotes = [\n retry(quotes_historical_google)(symbol, d1, d2) for symbol in symbols\n]\n\nclose_prices = np.vstack([q['close'] for q in quotes])\nopen_prices = np.vstack([q['open'] for q in quotes])\n\n# The daily variations of the quotes are what carry most information\nvariation = close_prices - open_prices\n\n\n# #############################################################################\n# Learn a graphical structure from the correlations\nedge_model = covariance.GraphLassoCV()\n\n# standardize the time series: using correlations rather than covariance\n# is more efficient for structure recovery\nX = variation.copy().T\nX /= X.std(axis=0)\nedge_model.fit(X)\n\n# #############################################################################\n# Cluster using affinity propagation\n\n_, labels = cluster.affinity_propagation(edge_model.covariance_)\nn_labels = labels.max()\n\nfor i in range(n_labels + 1):\n print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))\n\n# #############################################################################\n# Find a low-dimension embedding for visualization: find the best position of\n# the nodes (the stocks) on a 2D plane\n\n# We use a dense eigen_solver to achieve reproducibility (arpack is\n# initiated with random vectors that we don't control). In addition, we\n# use a large number of neighbors to capture the large-scale structure.\nnode_position_model = manifold.LocallyLinearEmbedding(\n n_components=2, eigen_solver='dense', n_neighbors=6)\n\nembedding = node_position_model.fit_transform(X.T).T\n\n# #############################################################################\n# Visualization\nplt.figure(1, facecolor='w', figsize=(10, 8))\nplt.clf()\nax = plt.axes([0., 0., 1., 1.])\nplt.axis('off')\n\n# Display a graph of the partial correlations\npartial_correlations = edge_model.precision_.copy()\nd = 1 / np.sqrt(np.diag(partial_correlations))\npartial_correlations = d\npartial_correlations = d[:, np.newaxis]\nnon_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)\n\n# Plot the nodes using the coordinates of our embedding\nplt.scatter(embedding[0], embedding[1], s=100 d ** 2, c=labels,\n cmap=plt.cm.spectral)\n\n# Plot the edges\nstart_idx, end_idx = np.where(non_zero)\n# a sequence of (line0, line1, line2), where::\n# linen = (x0, y0), (x1, y1), ... (xm, ym)\nsegments = [[embedding[:, start], embedding[:, stop]]\n for start, stop in zip(start_idx, end_idx)]\nvalues = np.abs(partial_correlations[non_zero])\nlc = LineCollection(segments,\n zorder=0, cmap=plt.cm.hot_r,\n norm=plt.Normalize(0, .7 * values.max()))\nlc.set_array(values)\nlc.set_linewidths(15 * values)\nax.add_collection(lc)\n\n# Add a label to each node. The challenge here is that we want to\n# position the labels to avoid overlap with other labels\nfor index, (name, label, (x, y)) in enumerate(\n zip(names, labels, embedding.T)):\n\n dx = x - embedding[0]\n dx[index] = 1\n dy = y - embedding[1]\n dy[index] = 1\n this_dx = dx[np.argmin(np.abs(dy))]\n this_dy = dy[np.argmin(np.abs(dx))]\n if this_dx > 0:\n horizontalalignment = 'left'\n x = x + .002\n else:\n horizontalalignment = 'right'\n x = x - .002\n if this_dy > 0:\n verticalalignment = 'bottom'\n y = y + .002\n else:\n verticalalignment = 'top'\n y = y - .002\n plt.text(x, y, name, size=10,\n horizontalalignment=horizontalalignment,\n verticalalignment=verticalalignment,\n bbox=dict(facecolor='w',\n edgecolor=plt.cm.spectral(label / float(n_labels)),\n alpha=.6))\n\nplt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),\n embedding[0].max() + .10 * embedding[0].ptp(),)\nplt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),\n embedding[1].max() + .03 * embedding[1].ptp())\n\nplt.show()"
	`29`	+ "from __future__ import print_function\n\n# Author: Gael Varoquaux [email protected]\n# License: BSD 3 clause\n\nimport sys\nfrom datetime import datetime\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom matplotlib.collections import LineCollection\nfrom six.moves.urllib.request import urlopen\nfrom six.moves.urllib.parse import urlencode\nfrom sklearn import cluster, covariance, manifold\n\nprint(__doc__)\n\n# #############################################################################\n# Retrieve the data from Internet\n\ndef retry(f, n_attempts=3):\n \"Wrapper function to retry function calls in case of exceptions\"\n def wrapper(args, kwargs):\n for i in range(n_attempts):\n try:\n return f(args, *kwargs)\n except Exception as e:\n if i == n_attempts - 1:\n raise\n return wrapper\n\n\ndef quotes_historical_google(symbol, date1, date2):\n \"\"\"Get the historical data from Google finance.\n\n Parameters\n ----------\n symbol : str\n Ticker symbol to query for, for example ``\"DELL\"``.\n date1 : datetime.datetime\n Start date.\n date2 : datetime.datetime\n End date.\n\n Returns\n -------\n X : array\n The columns are ``date`` -- datetime, ``open``, ``high``,\n ``low``, ``close`` and ``volume`` of type float.\n \"\"\"\n params = urlencode({\n 'q': symbol,\n 'startdate': date1.strftime('%b %d, %Y'),\n 'enddate': date2.strftime('%b %d, %Y'),\n 'output': 'csv'\n })\n url = 'http://www.google.com/finance/historical?' + params\n response = urlopen(url)\n dtype = {\n 'names': ['date', 'open', 'high', 'low', 'close', 'volume'],\n 'formats': ['object', 'f4', 'f4', 'f4', 'f4', 'f4']\n }\n converters = {0: lambda s: datetime.strptime(s.decode(), '%d-%b-%y')}\n return np.genfromtxt(response, delimiter=',', skip_header=1,\n dtype=dtype, converters=converters,\n missing_values='-', filling_values=-1)\n\n\n# Choose a time period reasonably calm (not too long ago so that we get\n# high-tech firms, and before the 2008 crash)\nd1 = datetime(2003, 1, 1)\nd2 = datetime(2008, 1, 1)\n\nsymbol_dict = {\n 'TOT': 'Total',\n 'XOM': 'Exxon',\n 'CVX': 'Chevron',\n 'COP': 'ConocoPhillips',\n 'VLO': 'Valero Energy',\n 'MSFT': 'Microsoft',\n 'IBM': 'IBM',\n 'TWX': 'Time Warner',\n 'CMCSA': 'Comcast',\n 'CVC': 'Cablevision',\n 'YHOO': 'Yahoo',\n 'DELL': 'Dell',\n 'HPQ': 'HP',\n 'AMZN': 'Amazon',\n 'TM': 'Toyota',\n 'CAJ': 'Canon',\n 'SNE': 'Sony',\n 'F': 'Ford',\n 'HMC': 'Honda',\n 'NAV': 'Navistar',\n 'NOC': 'Northrop Grumman',\n 'BA': 'Boeing',\n 'KO': 'Coca Cola',\n 'MMM': '3M',\n 'MCD': 'McDonald\\'s',\n 'PEP': 'Pepsi',\n 'K': 'Kellogg',\n 'UN': 'Unilever',\n 'MAR': 'Marriott',\n 'PG': 'Procter Gamble',\n 'CL': 'Colgate-Palmolive',\n 'GE': 'General Electrics',\n 'WFC': 'Wells Fargo',\n 'JPM': 'JPMorgan Chase',\n 'AIG': 'AIG',\n 'AXP': 'American express',\n 'BAC': 'Bank of America',\n 'GS': 'Goldman Sachs',\n 'AAPL': 'Apple',\n 'NYSE:SAP': 'SAP',\n 'CSCO': 'Cisco',\n 'TXN': 'Texas Instruments',\n 'XRX': 'Xerox',\n 'WMT': 'Wal-Mart',\n 'HD': 'Home Depot',\n 'GSK': 'GlaxoSmithKline',\n 'PFE': 'Pfizer',\n 'SNY': 'Sanofi-Aventis',\n 'NVS': 'Novartis',\n 'KMB': 'Kimberly-Clark',\n 'R': 'Ryder',\n 'GD': 'General Dynamics',\n 'RTN': 'Raytheon',\n 'CVS': 'CVS',\n 'CAT': 'Caterpillar',\n 'DD': 'DuPont de Nemours'}\n\nsymbols, names = np.array(sorted(symbol_dict.items())).T\n\n# retry is used because quotes_historical_google can temporarily fail\n# for various reasons (e.g. empty result from Google API).\nquotes = []\n\nfor symbol in symbols:\n print('Fetching quote history for %r' % symbol, file=sys.stderr)\n quotes.append(retry(quotes_historical_google)(symbol, d1, d2))\n\nclose_prices = np.vstack([q['close'] for q in quotes])\nopen_prices = np.vstack([q['open'] for q in quotes])\n\n# The daily variations of the quotes are what carry most information\nvariation = close_prices - open_prices\n\n\n# #############################################################################\n# Learn a graphical structure from the correlations\nedge_model = covariance.GraphLassoCV()\n\n# standardize the time series: using correlations rather than covariance\n# is more efficient for structure recovery\nX = variation.copy().T\nX /= X.std(axis=0)\nedge_model.fit(X)\n\n# #############################################################################\n# Cluster using affinity propagation\n\n_, labels = cluster.affinity_propagation(edge_model.covariance_)\nn_labels = labels.max()\n\nfor i in range(n_labels + 1):\n print('Cluster %i: %s' % ((i + 1), ', '.join(names[labels == i])))\n\n# #############################################################################\n# Find a low-dimension embedding for visualization: find the best position of\n# the nodes (the stocks) on a 2D plane\n\n# We use a dense eigen_solver to achieve reproducibility (arpack is\n# initiated with random vectors that we don't control). In addition, we\n# use a large number of neighbors to capture the large-scale structure.\nnode_position_model = manifold.LocallyLinearEmbedding(\n n_components=2, eigen_solver='dense', n_neighbors=6)\n\nembedding = node_position_model.fit_transform(X.T).T\n\n# #############################################################################\n# Visualization\nplt.figure(1, facecolor='w', figsize=(10, 8))\nplt.clf()\nax = plt.axes([0., 0., 1., 1.])\nplt.axis('off')\n\n# Display a graph of the partial correlations\npartial_correlations = edge_model.precision_.copy()\nd = 1 / np.sqrt(np.diag(partial_correlations))\npartial_correlations = d\npartial_correlations = d[:, np.newaxis]\nnon_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)\n\n# Plot the nodes using the coordinates of our embedding\nplt.scatter(embedding[0], embedding[1], s=100 d ** 2, c=labels,\n cmap=plt.cm.spectral)\n\n# Plot the edges\nstart_idx, end_idx = np.where(non_zero)\n# a sequence of (line0, line1, line2), where::\n# linen = (x0, y0), (x1, y1), ... (xm, ym)\nsegments = [[embedding[:, start], embedding[:, stop]]\n for start, stop in zip(start_idx, end_idx)]\nvalues = np.abs(partial_correlations[non_zero])\nlc = LineCollection(segments,\n zorder=0, cmap=plt.cm.hot_r,\n norm=plt.Normalize(0, .7 * values.max()))\nlc.set_array(values)\nlc.set_linewidths(15 * values)\nax.add_collection(lc)\n\n# Add a label to each node. The challenge here is that we want to\n# position the labels to avoid overlap with other labels\nfor index, (name, label, (x, y)) in enumerate(\n zip(names, labels, embedding.T)):\n\n dx = x - embedding[0]\n dx[index] = 1\n dy = y - embedding[1]\n dy[index] = 1\n this_dx = dx[np.argmin(np.abs(dy))]\n this_dy = dy[np.argmin(np.abs(dx))]\n if this_dx > 0:\n horizontalalignment = 'left'\n x = x + .002\n else:\n horizontalalignment = 'right'\n x = x - .002\n if this_dy > 0:\n verticalalignment = 'bottom'\n y = y + .002\n else:\n verticalalignment = 'top'\n y = y - .002\n plt.text(x, y, name, size=10,\n horizontalalignment=horizontalalignment,\n verticalalignment=verticalalignment,\n bbox=dict(facecolor='w',\n edgecolor=plt.cm.spectral(label / float(n_labels)),\n alpha=.6))\n\nplt.xlim(embedding[0].min() - .15 * embedding[0].ptp(),\n embedding[0].max() + .10 * embedding[0].ptp(),)\nplt.ylim(embedding[1].min() - .03 * embedding[1].ptp(),\n embedding[1].max() + .03 * embedding[1].ptp())\n\nplt.show()"
`30`	`30`	`]`
`31`	`31`	`}`
`32`	`32`	`],`